---
title: "AITrustConjoint_Final_ReplicationKit"
author: "Adi Rao"
date: "2023-04-19"
output: html_document
---


```{r Data Preliminaries}

    ## Load packages - all script dependencies for cleaning, here.

        pacman::p_load(tidyverse, rio, vroom, ggthemes, ggsci, modelsummary, marginaleffects,
                         fixest, kableExtra, gtsummary, data.table, fastDummies, emmeans, patchwork, GGally, caret)
 

    ## Import data

        dat <- rio::import("/Users/kapurao/Desktop/Grad School/TrustParadox/CodingConjoint/code/MasterData2.csv") %>% 
                  filter(Finished == 1)
        
    ## Isolate Demographic Variables
        
        ## Isolate variable names for subsetting data
        
        depvars_old <- c(names(dat)[str_detect(names(dat),"_dv_")])
        
        depvars_new <-  sub( "_1$", "_support", depvars_old)
        depvars_new <-  sub( "_2$", "_trust", depvars_new)
        depvars_new <-  sub( "_3$", "_benefit_likert", depvars_new)
        
        
        cj_attributes <- names(dat)[str_detect(names(dat),"F-")]
        
        demo_vars <- names(dat)[!names(dat) %in% c(all_of(depvars_old), all_of(cj_attributes)) ]
        
      ## Pull Demographic Variables out, tied to id
        
        demo_post <- dat %>%
          select(c(rid, all_of(demo_vars))) %>%
          rename(id = ResponseId) 
    
        
        demo_vars <- c("gender", "political_party", "hhi", "education", "age")
        depvars <- c("support", "trust", "paradox")
        

        
        
  ## Create Conjoint Data   
        
        temp <- dat %>%
          select(c("ResponseId", all_of(depvars_old), all_of(cj_attributes))) %>%
          rename(id = ResponseId) %>%
          mutate(across(all_of(depvars_old), ~ as.character(.x))) %>% 
          rename_at(vars(depvars_old), ~depvars_new) %>% 
          rename_with(stringr::str_replace_all, 
                      pattern = "F-", replacement = "cj-", 
                      matches("F-")) %>%
          rename_with(stringr::str_replace_all, 
                    pattern = "conjoint_dv_", replacement = "cj-", 
                    matches("conjoint_dv_")) %>%
          pivot_longer(
            cols = starts_with("cj-"),
            names_to = "var_id",
            names_prefix = "cj_",
            values_to = "value",
            values_drop_na = TRUE
                )  %>%
          separate(var_id, c("cj_id", "varnames"), "cj-") %>%
          filter(nchar(varnames) > 4) %>% 
          mutate(cj_id = substr(varnames, 1,1)) %>% 
          mutate(varnames = case_when(cj_id == 1 ~ substr(varnames, 3, nchar(varnames)) ,
                                      TRUE ~ str_remove_all(string = varnames, pattern = paste0(cj_id, "_")))) %>% 
          filter(!(varnames %in% c("1-5", "1-6")))
        
      ## Rename variable stubs with conjoint variables
        
        temp$varnames[str_detect(temp$varnames, "1-1")] <- "domain"
        temp$varnames[str_detect(temp$varnames, "1-2")] <- "autonomy"
        temp$varnames[str_detect(temp$varnames, "1-3")] <- "precision"
        temp$varnames[str_detect(temp$varnames, "1-4")] <- "regulation"
        
      
      ## Create conjoint data for analysis
        
        conjoint_data <- temp %>%
          filter(!duplicated(temp[c("id", "cj_id", "varnames")])) %>% 
          pivot_wider(id_cols =  c(id, cj_id),
                      names_from = varnames) %>%
          
          mutate(paradox = as.numeric(trust) - as.numeric(support)) %>%
          left_join(demo_post, by = "id") %>% 
          mutate(age_0 = as.numeric(age)) %>% 
          filter(age_0 >= 18) %>% 
          filter(!is.na(age_0)) %>% 
          mutate(age_msn = case_when(age_0 == 18 | age_0 == 19 ~ "18-19",
                                     age_0>=20 & age_0<=34 ~ "20-34",
                                     age_0>=35 & age_0<=44 ~ "35-44",
                                     age_0>=45 & age_0<=54 ~ "45-54",
                                     age_0>=55 & age_0<=64 ~ "55-64",
                                     age_0>=65 ~ "65+")) %>% 
          mutate(across(all_of(depvars), ~ as.numeric(.x)),
                 across(all_of(demo_vars), ~ as.factor(.x))) 

        
        





```




```{r Descriptive Exhibits}


  
  # We will use tabData just for descriptive statistics if we want to group certain variables (e.g., age) into more readable, cleaer format in the summaries.
  tabData <- dat %>% 
  
    
    mutate(
      
  gender = case_when(gender == 1 ~ "Male",
                     gender == 2 ~ "Female/Other"),
  
  
  party3 = case_when(Q21 == 1 ~ "Extremely Liberal",
                     Q21 == 2 ~ "Liberal",
                     Q21 == 3 ~ "Slightly Liberal",
                     Q21 == 4 ~ "Moderate/Unsure",
                     Q21 == 5 ~ "Slightly Conservative",
                     Q21 == 6 ~ "Conservative",
                     Q21 == 7 ~ "Extremely Conservative",
                     Q21 == 8 ~ "Moderate/Unsure"),
  
  ethnicity = case_when(Q3 == 1 ~ "American Indian and Alaskan Native",
                        Q3 == 2 ~ "Asian",
                        Q3 == 3 ~ "Black",
                        Q3 == 4 ~ "Hispanic/Latino",
                        Q3 == 5 ~ "Native Hawaiian and Other Pacific Islander",
                        Q3 == 6 ~ "White, Non-Hispanic"),
  
  income = case_when(Q19 == 1 ~ "l.t. 10,000",
                     Q19 == 2 ~ "10,000 to 24,999",
                     Q19 == 3 ~ "25,000 to 49,999",
                     Q19 == 4 ~ "50,000 to 74,999",
                     Q19 == 5 ~ "75,000 to 99,999",
                     Q19 == 6 ~ "100,000+"),
  
  education = case_when(Q4 == 1 ~ "l.t. HS",
                        Q4 == 2 ~ "High School / GED",
                        Q4 == 3 ~ "Some College",
                        Q4 == 4 ~ "2-year College Degree",
                        Q4 == 5 ~ "4-year College Degree",
                        Q4 == 6 ~ "Post-Baccalaureate Degree +")
  
    )  %>% 
    mutate(age_msn = case_when(age == 18 | age == 19 ~ "18-19",
                               age>=20 & age<=34 ~ "20-34",
                               age>=35 & age<=44 ~ "35-44",
                               age>=45 & age<=54 ~ "45-54",
                               age>=55 & age<=64 ~ "55-64",
                               age>=65 ~ "65+"))


### The actual conjoint data we will be using. We do some minor cleaning here: mostly just renaming variables or changing some values (e.g., the few "don't know" respondents for party are now independents.)

conjoint_data2 <- conjoint_data %>% 
  
  mutate(
    
    gender = case_when(Q1 == 1 ~ "Male",
                       Q1 == 2 ~ "Female/Other",
                       Q1 == 3 ~ "Female/Other"),
    
    
    party3 = case_when(Q21 == 1 ~ 1,
                       Q21 == 2 ~ 2,
                       Q21 == 3 ~ 3,
                       Q21 == 4 ~ 4,
                       Q21 == 5 ~ 5,
                       Q21 == 6 ~ 6,
                       Q21 == 7 ~ 7,
                       Q21 == 8 ~ 4),
    
    
    income = case_when(Q19 == 1 ~ 1,
                       Q19 == 2 ~ 2,
                       Q19 == 3 ~ 3,
                       Q19 == 4 ~ 4,
                       Q19 == 5 ~ 5,
                       Q19 == 6 ~ 6),
    
    ethnicity = case_when(Q3 == 6 ~ "White",
                          Q3 == 2 ~ "Non-white",
                          Q3 == 3 ~ "Non-white",
                          Q3 == 4 ~ "Non-white",
                          Q3 == 5 ~ "Non-white",
                          Q3 == 1 ~ "Non-white"),
    
    education = case_when(Q4 == 1 ~ 1,
                          Q4 == 2 ~ 2,
                          Q4 == 3 ~ 3,
                          Q4 == 4 ~ 4,
                          Q4 == 5 ~ 5,
                          Q4 == 6 ~ 6)
    
  ) 



### A summary stats table for the conjoint figures to be used as needed. This is where the tabData can come in.

datasummary(party3 + gender + income + ethnicity + education + age_msn ~ 1 * (N  + (`%` = Percent())), 
            data = tabData,
            output = "/Users/kapurao/Desktop/Grad School/TrustParadox/CodingConjoint/code/summary_statistics_check.docx")

```




```{r Analysis}


# Create macros for analysis ----------------------------------------------


# Reviewers asked to add mediators into the regression, so let's rename those first. (We recommended not to add mediators to the regression for fear of post-treatment bias, but the code is here just in case.)

conjoint_data2 <- conjoint_data2 %>% 
  rename("FOMO" = "Q12",
         "CostBenefit"="Q13",
          "Efficiency"="Q14",
          "Optimism"="Q15",
          "Transparency"="Q17")


    demo_vars <- c("gender", "party3", "income", "education", "ethnicity", "age_0")
    depvars <- c("support", "trust", "paradox")
    mediator_vars <- c("FOMO", "CostBenefit", "Efficiency", "Optimism", "Transparency")
    

  # Function to create marginal means plots
    
    create_mmPlots <- function(model,
                               depVar){
      
                          marginalmeans(model) %>% 
                            mutate(print_est = paste0(round(estimate, 2))) %>%
                            mutate(term = str_to_title(term),
                                   value = str_to_title(value)) %>%
                            mutate(value=fct_relevel(value,"Moderate Precision (Correct 85% Of The Time With 15% False Positives)",
                                                     "Substantial Precision (Correct 90% Of The Time With 10% False Positives)",
                                                     "Maximum Precision (Correct 99% Of The Time With 1% False Positives)")) %>%
                            ggplot(aes(x = estimate, y = value, color = factor(term), fill = factor(term), group = factor(term))) +  
                            geom_linerange(aes(xmin = conf.low, xmax = conf.high)) + 
                            geom_label(aes(label = print_est), color = 'white') +
                            xlab(paste0("Marginal Mean ", depVar)) +
                            ylab(" ") +
                            facet_wrap(term~., scales = 'free_y', ncol = 1) +
                            theme_minimal() +
                            
                            scale_color_nejm() +
                            scale_fill_nejm() +
                            
                            theme(legend.position = 'none',
                                  strip.text = element_text(face = 'bold'))
      
                          }
    
  # Holder list for conjoint results  
    cjResults <- list()
  
  # Function to clean conjoint output 
    clean_cjOutput <- function(model){
      
      
          model %>% 
        
            ## Coerce results to data frame with CI using broom::tidy, clean up coefficient names
              broom::tidy(conf.int = T) %>% 
              filter( term != "(Intercept)") %>% 
              separate(term, c("attribute", "level"), "::") %>%
              mutate(level = str_to_title(level),
                     attribute = str_to_title(attribute)) %>% 
            
            ## Filter coefficients to be varied conjoint attributes  
              filter(attribute %in% c("Domain", "Autonomy", "Regulation", "Precision")) %>% 
              mutate(print_est = paste0(round(estimate, 3), case_when(p.value <= 0.05 ~ "*",
                                                                      p.value <= 0.05 ~ "*",
                                                                      p.value <= 0.05 ~ "*",
                                                                      TRUE ~ ""))) %>%
            
            ## Add 0 rows to data frame for attribute reference categories
              add_row(attribute = "Domain", level = "Cars",
                      estimate = 0, std.error = 0, statistic = 0, p.value = 0, conf.low = 0, conf.high = 0, print_est = "Reference Level") %>% 
              
              add_row(attribute = "Autonomy", level = "No Autonomy, Full Human Oversight",
                      estimate = 0, std.error = 0, statistic = 0, p.value = 0, conf.low = 0, conf.high = 0, print_est = "Reference Level") %>% 
              
              add_row(attribute = "Precision", level = "Moderate Precision (Correct 85% Of The Time With 15% False Positives)",
                      estimate = 0, std.error = 0, statistic = 0, p.value = 0, conf.low = 0, conf.high = 0, print_est = "Reference Level") %>% 
              
              add_row(attribute = "Regulation", level = "Individual or Community Users",
                      estimate = 0, std.error = 0, statistic = 0, p.value = 0, conf.low = 0, conf.high = 0, print_est = "Reference Level")   
      
      
    }
  # Function to create conjoint figures
    create_cjPlots <- function(dat2, ## cleaned data frame from clean_cjOutput
                               depVar ## Name of dependent variable %in% c("Support", "Trust", "Trust Paradox")
                               ){
      
      dat2 %>%
        ggplot(aes(x = estimate, y = level, group = factor(attribute), color = factor(attribute), fill = factor(attribute))) +
        geom_vline(aes(xintercept = 0)) +
        geom_linerange(aes(xmin = conf.low, xmax = conf.high)) +
        geom_label(aes(label = print_est), color = 'white', fontface = 'bold') +
        xlab(paste0("Percentage Point Change in ", {{depVar}})) +
        ylab(" ") +
        facet_wrap(attribute~., scales = 'free_y', ncol = 1) +
        theme_minimal() +
        
        scale_color_nejm() +
        scale_fill_nejm() +
        
        theme(legend.position = 'none',
              strip.text = element_blank())
      
    }
    



### First, we want to make those marginal means plots. We want to normalize the scale in order to be consistent with other conjoint studies + increase ease of interpretability. I have included, but hashed out, the code to make marginal means with the original scale as well.
    
    
     ## Let's rescale everything in a new dataframe, just in case things go awry.
    
    conjoint_data3 <- conjoint_data2
    
    process <- preProcess(as.data.frame(conjoint_data3), method=c("range"))
    
    norm_scale <- predict(process, as.data.frame(conjoint_data3))
    
    norm_scale$paradox2 <- norm_scale$trust - norm_scale$support
    
    support <- lm(support ~ domain + autonomy + regulation+ precision, data = norm_scale)
    trust <- lm(trust ~ domain + autonomy + regulation+ precision, data = norm_scale)
    paradox <- lm(paradox ~ domain + autonomy + regulation+ precision, data = norm_scale)

    ## If we wanted to keep the original 5-point Likert, use this:
    
    #support <- lm(support ~ domain + autonomy + regulation+ precision, data = conjoint_data2)
    #trust <- lm(trust ~ domain + autonomy + regulation+ precision, data = conjoint_data2)
    
    
    ## Combine the two
      combined <- rbind(marginalmeans(support) %>% 
                              mutate(dv = 'Support') ,
                            
                            marginalmeans(trust) %>% 
                              mutate(dv = 'Trust')) %>% 
        
                    mutate(print_est = paste0(round(estimate, 2))) %>% 
                    mutate(term = str_to_title(term),
                           value = str_to_title(value)) 
 
      
  ## Create and export marginal means plots if we wanted to present them one by one (i.e., for slides in presentations perhaps).

    ## Support 
      create_mmPlots(support, "Support")
      #ggsave("/Users/kapurao/Desktop/Grad School/TrustParadox/CodingConjoint/code/support_mm.pdf", width = 16, height = 9)
      
    
    ## Trust
      create_mmPlots(trust, "Trust")
      #ggsave("/Users/kapurao/Desktop/Grad School/TrustParadox/CodingConjoint/code/trust_mm.pdf", width = 16, height = 9)
      
      
    ## ... but we ultimately end up using the combined plot in the paper.
    
      combined %>%
        mutate(value=fct_relevel(value,"Moderate Precision (Correct 85% Of The Time With 15% False Positives)",
                                 "Substantial Precision (Correct 90% Of The Time With 10% False Positives)",
                                 "Maximum Precision (Correct 99% Of The Time With 1% False Positives)")) %>%
        ggplot(aes(x = estimate, y = value, color = factor(dv),  group = factor(dv))) +  
        geom_linerange(aes(xmin = conf.low, xmax = conf.high), position = position_dodge(width = 1)) + 
        geom_point(size = 1, position = position_dodge(width = 1)) +
        xlab("Marginal Means: Degree of Support and Trust") +
        ylab(" ") +
        #facet_wrap(term~., scales = 'free_y', ncol = 1, strip.position = "left") +
        #theme(axis.text.y = element_text(angle=90, vjust=.5, hjust=1)) +
        facet_grid(term~., scales = 'free_y', switch="y")+
        theme_bw() +
        geom_stripped_rows(color = NA) +
        scale_color_nejm() +
        scale_fill_nejm() +
        xlim(0.35,0.75) +
        labs(color = "DV") +
        geom_vline(xintercept = 0.5, linetype="dotted", 
                   color = "grey52", size=1) +
        theme(legend.position = 'right',
              strip.text = element_text(face = 'bold'))
  
    ggsave("/Users/kapurao/Desktop/Grad School/TrustParadox/CodingConjoint/code/trust_support_mm_normrescal_check.pdf", 
           width = 10, height = 10)
    
      
    
### Now we want to run the regression to obtain AMCEs. We use the original 5-point Likert here as we are testing a causal argument with controls (rather than showing descriptive statistics).

### We run the following models in order: regressions with demographics for support versus trust; then, a pared down set of models (to use as needed); and finally, we were asked to include mediators into a regression (though in our memo response, we suggested that this might lead to post-treatment concerns. Consequently, I have hashed the mediator models.)
    
    reg1 <- conjoint_data2 %>% 
    
    feols(support ~ i(domain, ref = 'cars') + 
            i(autonomy, ref = 'no autonomy, full human oversight') +
            i(precision, ref = 'moderate precision (correct 85% of the time with 15% false positives)') +
            i(regulation, ref = 'individual or community users') + .[demo_vars],
          cluster = ~id)
  
  
  reg2 <- conjoint_data2 %>% 
    
    feols(trust ~ i(domain, ref = 'cars') + 
            i(autonomy, ref = 'no autonomy, full human oversight') +
            i(precision, ref = 'moderate precision (correct 85% of the time with 15% false positives)') +
            i(regulation, ref = 'individual or community users') + .[demo_vars],
          cluster = ~id)
  
  modelsummary(list(reg1, reg2), stars = T, output = "/Users/kapurao/Desktop/Grad School/TrustParadox/CodingConjoint/code/tab-4_check.docx")  
  
  
  # We want a model without controls or interactions
  reg3 <- conjoint_data2 %>% 
    
    feols(support ~ i(domain, ref = 'cars') + 
            i(autonomy, ref = 'no autonomy, full human oversight') +
            i(precision, ref = 'moderate precision (correct 85% of the time with 15% false positives)') +
            i(regulation, ref = 'individual or community users'),
          cluster = ~id)
  
  
  reg4 <- conjoint_data2 %>% 
    
    feols(trust ~ i(domain, ref = 'cars') + 
            i(autonomy, ref = 'no autonomy, full human oversight') +
            i(precision, ref = 'moderate precision (correct 85% of the time with 15% false positives)') +
            i(regulation, ref = 'individual or community users'),
          cluster = ~id)
  
  modelsummary(list(reg3, reg4), stars = T, output = "/Users/kapurao/Desktop/Grad School/TrustParadox/CodingConjoint/code/tabNoControls_check.docx")  
  
  
  # Reviewers asked us to add the mediators in the regression as controls
  #mediator_vars <- c("FOMO", "CostBenefit", "Efficiency", "Optimism", "Transparency")
  
  
  #conjoint_data2$FOMO <- as.numeric(conjoint_data2$FOMO)
  #conjoint_data2$CostBenefit <- as.numeric(conjoint_data2$CostBenefit)
  #conjoint_data2$Efficiency <- as.numeric(conjoint_data2$Efficiency)
  #conjoint_data2$Optimism <- as.numeric(conjoint_data2$Optimism)
  #conjoint_data2$Transparency <- as.numeric(conjoint_data2$Transparency)
  
  ## In our writeup, we report (not graph) the paradox size with p-values. "Combined2" shows just that.
  
  paradox <- lm(paradox2 ~ domain + autonomy + regulation + precision, data = norm_scale)
  
  combined2 <- rbind(marginalmeans(paradox) %>% 
                     mutate(dv = 'Paradox')) %>%
   mutate(print_est = paste0(round(estimate, 1))) %>% 
                    mutate(term = str_to_title(term),
                           value = str_to_title(value)) 
 
  
  
  
  ## Create and export marginal mean for the Paradox itself if we want to add a visualization.
  create_mmPlots(paradox, "Paradox")
  
  ggsave("/Users/kapurao/Desktop/Grad School/TrustParadox/CodingConjoint/code/paradox_mm_rescale_check.pdf", width = 16, height = 9)
  
  
  
    
```






